#ifndef COUL_MSM_H_
#define COUL_MSM_H_
#include "esmd_types.h"
#include "comm.h"
#define MAX_MSM_LEVELS 20
#define MIN_MSM_PER_PROC 4
typedef struct msm_comm_entry_1d {
  int pair, delta, start, end;
} msm_comm_entry_1d_t;
typedef struct msm_comm_schedule_1d {
  msm_comm_entry_1d_t *send, *recv;
  int nsend, nrecv, lsend, lrecv;
} msm_comm_schedule_1d_t;
typedef struct msm_1d_part {
  int order, ndirect, nlev, maxlev, np;
  int max_nsend, max_nrecv, max_lsend, max_lrecv;
  real glen, llen;
  real skin, cut;
  real delinv[MAX_MSM_LEVELS], del[MAX_MSM_LEVELS];
  msm_comm_schedule_1d_t rev_q_sched[MAX_MSM_LEVELS], fwd_q_sched[MAX_MSM_LEVELS], fwd_e_sched[MAX_MSM_LEVELS];
  int nproc[MAX_MSM_LEVELS], nmsm[MAX_MSM_LEVELS], pcnt[MAX_MSM_LEVELS], prem[MAX_MSM_LEVELS];
  int all_lo[MAX_MSM_LEVELS], all_hi[MAX_MSM_LEVELS], local_lo[MAX_MSM_LEVELS], local_hi[MAX_MSM_LEVELS];
  int qout_lo[MAX_MSM_LEVELS], qout_hi[MAX_MSM_LEVELS], qin_lo[MAX_MSM_LEVELS], qin_hi[MAX_MSM_LEVELS];
} msm_1d_part_t;

typedef union msm_1d_range {
  int v[6];
  struct {
    int qin_lo, qin_hi, qout_lo, qout_hi, local_lo, local_hi;
  } s;
} msm_1d_range_t;
typedef enum msm_1d_range_field {
  E_QIN_LO,
  E_QIN_HI,
  E_QOUT_LO,
  E_QOUT_HI,
  E_LOCAL_LO,
  E_LOCAL_HI
} msm_1d_range_field;

typedef struct msm_grid {
  box<real> lbox, gbox;
  msm_1d_part_t xpart, ypart, zpart;
  vec<int> nall[MAX_MSM_LEVELS], nlocal[MAX_MSM_LEVELS];
  vec<int> max_nall;
  vec<int> low[MAX_MSM_LEVELS], high[MAX_MSM_LEVELS];
  real *q[MAX_MSM_LEVELS], *e[MAX_MSM_LEVELS];
  real rcut;
  vec<real> skin;
  int maxlev, order;
  real *g_direct[MAX_MSM_LEVELS], *v_direct[MAX_MSM_LEVELS][6], *vs_direct[MAX_MSM_LEVELS];
  real *send_buf, *recv_buf;
  MPI_Request *rreq, *sreq;
  MPI_Status *rstat, *sstat;
  MPI_Comm xcomm, ycomm, zcomm;
  int active[MAX_MSM_LEVELS];
  real coul_const, qsqsum;
  //real ecoul, virial[6];
  mpp_t *mpp;
} msm_grid_t;
static const real GCONS[7][7] = {
    {},
    {},
    {15.0 / 8., -5.0 / 4., 3.0 / 8.},
    {35.0 / 16., -35.0 / 16., 21.0 / 16., -5.0 / 16.},
    {315.0 / 128., -105.0 / 32., 189.0 / 64., -45.0 / 32., 35.0 / 128.},
    {693.0 / 256., -1155.0 / 256., 693.0 / 128., -495.0 / 128., 385.0 / 256., -63.0 / 256.},
    {3003.0 / 1024., -3003.0 / 512., 9009.0 / 1024., -2145.0 / 256., 5005.0 / 1024., -819.0 / 512., 231.0 / 1024.}};
static const real DGCONS[7][6] = {
    {},
    {},
    {-5.0 / 2., 3.0 / 2.},
    {-35.0 / 8., 21.0 / 4., -15.0 / 8.},
    {-105.0 / 16., 189.0 / 16., -135.0 / 16., 35.0 / 16.},
    {-1155.0 / 128., 693.0 / 32., -1485.0 / 64., 385.0 / 32., -315.0 / 128.},
    {-3003.0 / 256., 9009.0 / 256., -6435.0 / 128., 5005.0 / 128., -4095.0 / 256., 693.0 / 256.}};
static const real PHICOEF[6][5][10] = {
    {},
    {},
    {{3. / 2., -5. / 2., 0. / 2., 2. / 2.},
     {-1. / 2., 5. / 2., -8. / 2., 4. / 2.}},
    {{-5. / 12., 13. / 12., 5. / 12., -25. / 12., 0. / 12., 12. / 12.},
     {5. / 24., -39. / 24., 105. / 24., -105. / 24., 10. / 24., 24. / 24.},
     {-1. / 24., 13. / 24., -65. / 24., 155. / 24., -174. / 24., 72. / 24.}},
    {{7. / 144., -25. / 144., -35. / 144., 161. / 144., 28. / 144., -280. / 144., 0. / 144., 144. / 144.},
     {-7. / 240., 75. / 240., -273. / 240., 315. / 240., 252. / 240., -630. / 240., 28. / 240., 240. / 240.},
     {7. / 720., -125. / 720., 889. / 720., -3185. / 720., 5908. / 720., -4970. / 720., 756. / 720., 720. / 720.},
     {-1. / 720., 25. / 720., -259. / 720., 1435. / 720., -4564. / 720., 8260. / 720., -7776. / 720., 2880. / 720.}},
    {{-9. / 2880., 41. / 2880., 126. / 2880., -654. / 2880., -441. / 2880., 3129. / 2880., 324. / 2880., -5396. / 2880., 0. / 2880., 2880. / 2880.},
     {3. / 1440., -41. / 1440., 180. / 1440., -138. / 1440., -945. / 1440., 1911. / 1440., 690. / 1440., -3172. / 1440., 72. / 1440., 1440. / 1440.},
     {-9. / 10080., 205. / 10080., -1872. / 10080., 8610. / 10080., -19845. / 10080., 15645. / 10080., 18342. / 10080., -34540. / 10080., 3384. / 10080., 10080. / 10080.},
     {9. / 40320., -287. / 40320., 3870. / 40320., -28686. / 40320., 126945. / 40320., -339423. / 40320., 523080. / 40320., -397684. / 40320., 71856. / 40320., 40320. / 40320.},
     {-1. / 40320., 41. / 40320., -726. / 40320., 7266. / 40320., -45129. / 40320., 179529. / 40320., -454544. / 40320., 700204. / 40320., -588240. / 40320., 201600. / 40320.}}};
static const real DPHICOEF[6][5][10] = {
    {},
    {},
    {{9. / 2., -10. / 2., 0. / 2.},
     {-3. / 2., 10. / 2., -8. / 2.}},
    {{-25. / 12., 52. / 12., 15. / 12., -50. / 12., 0. / 12.},
     {25. / 24., -156. / 24., 315. / 24., -210. / 24., 10. / 24.},
     {-5. / 24., 52. / 24., -195. / 24., 310. / 24., -174. / 24.}},
    {{49. / 144., -150. / 144., -175. / 144., 644. / 144., 84. / 144., -560. / 144., 0. / 144.},
     {-49. / 240., 450. / 240., -1365. / 240., 1260. / 240., 756. / 240., -1260. / 240., 28. / 240.},
     {49. / 720., -750. / 720., 4445. / 720., -12740. / 720., 17724. / 720., -9940. / 720., 756. / 720.},
     {-7. / 720., 150. / 720., -1295. / 720., 5740. / 720., -13692. / 720., 16520. / 720., -7776. / 720.}},
    {{-81. / 2880., 328. / 2880., 882. / 2880., -3924. / 2880., -2205. / 2880., 12516. / 2880., 972. / 2880., -10792. / 2880., 0. / 2880.},
     {27. / 1440., -328. / 1440., 1260. / 1440., -828. / 1440., -4725. / 1440., 7644. / 1440., 2070. / 1440., -6344. / 1440., 72. / 1440.},
     {-81. / 10080., 1640. / 10080., -13104. / 10080., 51660. / 10080., -99225. / 10080., 62580. / 10080., 55026. / 10080., -69080. / 10080., 3384. / 10080.},
     {81. / 40320., -2296. / 40320., 27090. / 40320., -172116. / 40320., 634725. / 40320., -1357692. / 40320., 1569240. / 40320., -795368. / 40320., 71856. / 40320.},
     {-9. / 40320., 328. / 40320., -5082. / 40320., 43596. / 40320., -225645. / 40320., 718116. / 40320., -1363632. / 40320., 1400408. / 40320., -588240. / 40320.}}};
INLINE real msm_gamma(real rho, int order) {
  if (rho < 1.0) {
    real rho2 = rho * rho;
    const real *gcons = GCONS[order >> 1];
    real g = gcons[order >> 1];
    for (int i = (order >> 1) - 1; i >= 0; i --){
      g = g * rho2 + gcons[i];
    }
    return g;
  } else {
    return 1. / rho;
  }
}

INLINE real msm_dgamma(real rho, int order) {
  if (rho < 1.0) {
    real rho2 = rho * rho;
    const real *dgcons = DGCONS[order >> 1];
    real dg = dgcons[(order >> 1) - 1];
    for (int i = (order >> 1) - 2; i >= 0; i --) {
      dg = dg * rho2 + dgcons[i];
    }
    return dg * rho;
  } else {
    return -1. / (rho * rho);
  }
}

INLINE void coul_msm(real r2, real inv_r2, real r, real inv_r, real inv_rcut, real inv_rcut2, int order, real kqq, real *ret_u, real *ret_dudr){
  real egamma = 1. - (r * inv_rcut) * msm_gamma(r * inv_rcut, order);
  real fgamma = 1. + (r2 * inv_rcut2) * msm_dgamma(r * inv_rcut, order);
  *ret_dudr = kqq * inv_r * fgamma * inv_r2;
  *ret_u = kqq * inv_r * egamma;
}

//function signatures
void get_msm_1d_range(msm_1d_range_t *range, msm_1d_part_t *part, int p, int lev);
void generate_comm_schedule(msm_comm_schedule_1d_t *sched, msm_1d_part_t *part, int recv, int send, int p, int lev);
void msm_1d_part_init(msm_1d_part_t *part, int np, int p, int order, int maxlev, int nlev, real glen, real cut, real skin);
void get_gv_direct(msm_grid_t *grid, int lev);
void msm_grid_init(msm_grid_t *grid, mpp_t *mpp, vec<int> * nlev, int order, real rcut, vec<real> * skin, real coul_const);
void compute_phi1(vec<real> * ret, vec<real> * d, int order);
void compute_dphi1(vec<real> * ret, vec<real> * d, int order);
void compute_phis(vec<real> * ret, vec<real> * d, int order);
void compute_dphis(vec<real> * ret, vec<real> * d, int order);
size_t pack_grid_brick(real *buf, real *q, vec<int> * low, vec<int> * nall, box<int> * box);
size_t unpack_add_grid_brick(real *buf, real *q, vec<int> * low, vec<int> * nall, box<int> * box);
size_t unpack_copy_grid_brick(real *buf, real *q, vec<int> * low, vec<int> * nall, box<int> * box);
void make_recv_requests(msm_comm_schedule_1d_t *sched, msm_grid_t *grid, int area, int axis, MPI_Comm comm);
void pack_send_grid(msm_comm_schedule_1d_t *sched, msm_grid_t *grid, real *g, box<int> * box0, int axis, int lev, MPI_Comm comm);
void recv_add_grid(msm_comm_schedule_1d_t *sched, msm_grid_t *grid, real *g, box<int> * box0, int axis, int lev, MPI_Comm comm);
void recv_copy_grid(msm_comm_schedule_1d_t *sched, msm_grid_t *grid, real *g, box<int> * box0, int axis, int lev, MPI_Comm comm);
void reverse_q(msm_grid_t *grid, mpp_t *mpp, real *g, int lev);
void forward_q(msm_grid_t *grid, mpp_t *mpp, real *g, int lev);
void forward_e(msm_grid_t *grid, mpp_t *mpp, real *g, int lev);
void restriction(msm_grid_t *grid, int lev);
void direct(msm_grid_t *grid, int lev, mdstat_t *stat);
void prolong(msm_grid_t *grid, int lev);
void cell2qgrid(cellgrid_t *cgrid, msm_grid_t *mgrid);
void egrid2cell(cellgrid_t *cgrid, msm_grid_t *mgrid);
void elocalgrid2cell(cellgrid_t *cgrid, msm_grid_t *mgrid);
void msm_compute(cellgrid_t *cgrid, msm_grid_t *mgrid, void *param, mdstat_t *gstat);
//end function signatures
#endif
